1
# install.packages("tidyverse")
# install.packages("data.table")
library(tidyverse)
library(data.table)
df2022 <- fread("ad_viz_plotval_data1.csv")
df2002 <- fread("ad_viz_plotval_data2.csv")
dim(df2022)#Check dimension
[1] 59756 22
dim(df2002)#Check dimension
[1] 15976 22
names(df2022)#Check variable name
[1] "Date" "Source"
[3] "Site ID" "POC"
[5] "Daily Mean PM2.5 Concentration" "Units"
[7] "Daily AQI Value" "Local Site Name"
[9] "Daily Obs Count" "Percent Complete"
[11] "AQS Parameter Code" "AQS Parameter Description"
[13] "Method Code" "Method Description"
[15] "CBSA Code" "CBSA Name"
[17] "State FIPS Code" "State"
[19] "County FIPS Code" "County"
[21] "Site Latitude" "Site Longitude"
names(df2002)#Check variable name
[1] "Date" "Source"
[3] "Site ID" "POC"
[5] "Daily Mean PM2.5 Concentration" "Units"
[7] "Daily AQI Value" "Local Site Name"
[9] "Daily Obs Count" "Percent Complete"
[11] "AQS Parameter Code" "AQS Parameter Description"
[13] "Method Code" "Method Description"
[15] "CBSA Code" "CBSA Name"
[17] "State FIPS Code" "State"
[19] "County FIPS Code" "County"
[21] "Site Latitude" "Site Longitude"
apply(df2022,2,class)#Check variable type
Date Source
"character" "character"
Site ID POC
"character" "character"
Daily Mean PM2.5 Concentration Units
"character" "character"
Daily AQI Value Local Site Name
"character" "character"
Daily Obs Count Percent Complete
"character" "character"
AQS Parameter Code AQS Parameter Description
"character" "character"
Method Code Method Description
"character" "character"
CBSA Code CBSA Name
"character" "character"
State FIPS Code State
"character" "character"
County FIPS Code County
"character" "character"
Site Latitude Site Longitude
"character" "character"
apply(df2002,2,class)#Check variable type
Date Source
"character" "character"
Site ID POC
"character" "character"
Daily Mean PM2.5 Concentration Units
"character" "character"
Daily AQI Value Local Site Name
"character" "character"
Daily Obs Count Percent Complete
"character" "character"
AQS Parameter Code AQS Parameter Description
"character" "character"
Method Code Method Description
"character" "character"
CBSA Code CBSA Name
"character" "character"
State FIPS Code State
"character" "character"
County FIPS Code County
"character" "character"
Site Latitude Site Longitude
"character" "character"
2
newdat <- rbind(df2002,df2022)
newdat$Date <- mdy(newdat$Date)
newdat$Year <- year(newdat$Date)
names(newdat)[3] <- "SiteID"
names(newdat)[21] <- "Latitude"
names(newdat)[22] <- "Longitude"
3
# install.packages("leaflet")
library(leaflet)
newdat$color <- ifelse(newdat$Year == "2002","red","blue")
# Create a basic map
leaflet(newdat) %>%
addTiles() %>% # Add OpenStreetMap tiles
addCircleMarkers(lng = ~Longitude, lat = ~Latitude,color = ~ color,label = ~SiteID)
4
sum(is.na(newdat$`Daily Mean PM2.5 Concentration`))
[1] 0
newdat %>%
group_by(`Daily Mean PM2.5 Concentration`) %>%
summarise(Count = n()) %>%
mutate(porp = Count / sum(Count))
# A tibble: 833 × 3
`Daily Mean PM2.5 Concentration` Count porp
<dbl> <int> <dbl>
1 -6.7 1 0.0000132
2 -6.3 1 0.0000132
3 -5.1 1 0.0000132
4 -4.7 2 0.0000264
5 -4.1 1 0.0000132
6 -3.1 1 0.0000132
7 -3 1 0.0000132
8 -2.2 2 0.0000264
9 -2.1 1 0.0000132
10 -2 1 0.0000132
# ℹ 823 more rows
newdat %>%
group_by(Year) %>%
summarise(MeanPM25 = mean(`Daily Mean PM2.5 Concentration`),
SdPM25 = sd(`Daily Mean PM2.5 Concentration`))
# A tibble: 2 × 3
Year MeanPM25 SdPM25
<int> <dbl> <dbl>
1 2002 16.1 13.9
2 2022 8.43 7.64
5
summary(newdat$`Daily Mean PM2.5 Concentration`)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-6.70 4.50 7.60 10.05 12.20 302.50
summary(newdat$`Daily AQI Value`)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0 25.0 42.0 43.5 57.0 454.0
ggplot(newdat,aes(x = `Daily AQI Value`))+
geom_boxplot()

ggplot(newdat,aes(x = `Daily AQI Value`,y = `Daily Mean PM2.5 Concentration`))+
geom_point()

newdat %>%
group_by(Year) %>%
summarise(MeanPM25 = mean(`Daily Mean PM2.5 Concentration`)) %>%
ggplot(aes(x = Year,y = MeanPM25))+
geom_bar(stat = "identity")
